********************************************************************************
*	OVERVIEW
*		This is the cleaning code for the paper: 
*		"Redesigning payments for ecosystem services to increase cost-effectiveness"
*		Santiago Izquierdo-Tort, Seema Jayachandran and Santiago Saavedra
********************************************************************************


*===============================================================================*
*								1. Treatment assignment
*===============================================================================*
	import delimited "${raw}\id_treatment.csv", clear
	rename id code_id
	tostring code_id, replace
	unique code_id
	rename treatment treatment_contract
	gen treatment=1 if treatment_contract=="Contract A"
	replace treatment=0 if treatment_contract=="Contract B"
	label def treat 1 "Treatment" 0 "Control"
	label values treatment treat
	save "${temp}\treatment.dta", replace	
	
*===============================================================================*
*								2. Plot-level data 
*===============================================================================*	
	import delimited "${raw}\final_data_pdf.csv", clear
	rename code_ipa code_id
		unique code_id	
		
	* Remote-sensing observed deforestation
	gen polygon_deforestation_d=lost_forest<0
	tostring code_id, replace
	label var lost_forest "Forest lost between May 2021 and May 2022"
	

	collapse (sum) lost_forest pred_defo_area mean_defo_x (max) polygon_deforestation_d , by(code_id contract_type)
	label var polygon_deforestation_d "Deforestation in polygon (=1)"
	label var lost_forest "Lost forest"

	save "${temp}\plotlevel.dta", replace
	
	
*===============================================================================*
*								3. Deforestation data
*===============================================================================*	
	import delimited "${raw}\final_data_final_1115.csv", clear

	gen code_id=substr(id,1,4)
	unique code_id
	
		
	
	rename v3 contract_pred_forest_May2022
	rename property_predicted_forest_may202 property_pred_forest_may2021
	rename contract_predicted_forest_may202 contract_pred_forest_may2021
	rename v6 property_pred_forest_may2022
	rename property_predicted_forest_septem property_pred_forest_sep2022
	rename contract_predicted_forest_june20 contract_pred_forest_june2020
	rename property_predicted_forest_june20 property_pred_forest_june2020
	rename contract_predicted_forest_septem contract_pred_forest_sep2022
	
	
	preserve
	rename id polygon_id
	collapse (sum) forest_loss_may_contract forest_loss_sept_contract forest_loss_may_property forest_loss_sept_property contract_pred_forest_may2021 property_pred_forest_may2021 forest_loss_contract_before forest_loss_property_before contract_pred_forest_june2020 property_pred_forest_sep2022 contract_pred_forest_sep2022 property_pred_forest_june2020 contract_area property_area, by(polygon_id)	
	* Deforestation as % of the area contracted
	
	gen defo_perc_may_contract=forest_loss_may_contract/contract_pred_forest_may2021
		la var defo_perc_may_contract "Deforestation as % of area contracted to may 2022 (base may 2021)"
	gen defo_perc_sept_contract=forest_loss_sept_contract/contract_pred_forest_may2021
		la var defo_perc_sept_contract `"Deforestation as % of area contracted to sept 2022 (base may 2021)"'
	
	
	* Deforestation as % of the property area
	
	gen defo_perc_may_property=forest_loss_may_property/property_pred_forest_may2021
		la var defo_perc_may_property "Deforestation as % of property area to may 2022 (base may 2021)"	
	gen defo_perc_sept_property=forest_loss_sept_property/property_pred_forest_may2021
		la var defo_perc_sept_property "Deforestation as % of property area to sept 2022 (base may 2021)"	
	
	
	* Previous deforestation as %
	gen forest_loss_contract_before_perc=forest_loss_contract_before/contract_pred_forest_june2020
		la var forest_loss_contract_before_perc "Previous deforestation as % of contract area to may 2021 (base june 2022)"	
	gen forest_loss_property_before_perc=forest_loss_property_before/property_pred_forest_june2020
		la var forest_loss_property_before_perc "Previous deforestation as % of property area to may 2021 (base june 2022)"	
	
	save "${temp}\deforestation_polygon.dta", replace
	restore
	
	preserve
	rename id polygon_id
	collapse (sum) forest_loss_may_contract forest_loss_sept_contract forest_loss_may_property forest_loss_sept_property contract_pred_forest_may2021 property_pred_forest_may2021 forest_loss_contract_before forest_loss_property_before contract_pred_forest_june2020 property_pred_forest_sep2022 contract_pred_forest_sep2022 property_pred_forest_june2020 contract_area, by(polygon_id)	
	keep polygon_id contract_area
	keep if contract_area==0
	save "${temp}\polygons_no_contract_area.dta", replace
	restore	

	collapse (sum) forest_loss_may_contract forest_loss_sept_contract forest_loss_may_property forest_loss_sept_property contract_pred_forest_may2021 property_pred_forest_may2021 forest_loss_contract_before forest_loss_property_before contract_pred_forest_june2020 property_pred_forest_sep2022 contract_pred_forest_sep2022 property_pred_forest_june2020 contract_area property_area, by(code_id)
	
	label var forest_loss_may_contract "Forest loss in contracted areas from may 2021 to may 2022"
	label var forest_loss_sept_contract "Forest loss in contracted areas from may 2021 to sept 2022"
	label var forest_loss_may_property "Forest loss in total property area from may 2021 to may 2022"
	label var forest_loss_sept_property "Forest loss in total property area from may 2021 to sept 2022"
	label var forest_loss_contract_before "Difference of prediction between June 2020 and May2021 on contract"
	label var forest_loss_property_before "Difference of prediction between June 2020 and May2021 on property."
	
	
	* Deforestation as % of the area contracted
	
	gen defo_perc_may_contract=forest_loss_may_contract/contract_pred_forest_may2021
		la var defo_perc_may_contract "Deforestation as % of area contracted to may 2022 (base may 2021)"
	gen defo_perc_sept_contract=forest_loss_sept_contract/contract_pred_forest_may2021
		la var defo_perc_sept_contract `"Deforestation as % of area contracted to sept 2022 (base may 2021)"'
	
	
	* Deforestation as % of the property area
	
	gen defo_perc_may_property=forest_loss_may_property/property_pred_forest_may2021
		la var defo_perc_may_property "Deforestation as % of property area to may 2022 (base may 2021)"	
	gen defo_perc_sept_property=forest_loss_sept_property/property_pred_forest_may2021
		la var defo_perc_sept_property "Deforestation as % of property area to sept 2022 (base may 2021)"	
	
	
	* Previous deforestation as %
	gen forest_loss_contract_before_perc=forest_loss_contract_before/contract_pred_forest_june2020
		la var forest_loss_contract_before_perc "Previous deforestation as % of contract area to may 2021 (base june 2022)"	
	gen forest_loss_property_before_perc=forest_loss_property_before/property_pred_forest_june2020
		la var forest_loss_property_before_perc "Previous deforestation as % of property area to may 2021 (base june 2022)"	
	
	save "${temp}\deforestation.dta", replace
	
	
	
	
	
*===============================================================================*
*								3. Baseline data 
*===============================================================================*
	use "${raw}\pes_baseline_clean_nopii.dta", clear	// N = 83	
	rename id code_id
	tostring code_id, replace
	unique code_id
	
	* Add treatment status
	merge 1:1 code_id using "${temp}\treatment.dta", nogen keepusing(treatment)
	
	* Double checking - all variables with -98 or -99 recoded as missing values
	ds , has(type numeric)
	foreach var in 	`r(varlist)'{
		replace `var' = . if `var'==-98
		replace `var' = . if `var'==-99
		replace `var' = . if `var'==-97	
		replace `var' = . if `var'==.d
		replace `var' = . if `var'==.r
	}
	
	gen numerator = 0
	gen denominator = 0
	foreach var of varlist ganado_455_* {
			replace denominator = denominator + 1 if !mi(`var')
			replace numerator = numerator + 1 if `var' == 1 & !mi(`var') 
	}
	gen ganado_455_perc = numerator/denominator
	gen ganado_455_bin = ganado_455_perc > 0
	drop denominator numerator	
		
	* has monetary savings
	gen has_savings = tipo_ahorro_37 == 0 | tipo_ahorro_37 == 2 if tipo_ahorro_37!=.

	* size of land across plots
	egen size_land_ha_48_tot = rowtotal(size_land_ha_48_*)

	* percent of plots on a slope
	gen pendiente_49_bintot = 0
	gen denominator = 0
	foreach var of varlist pendiente_49_1 pendiente_49_2 pendiente_49_3 pendiente_49_4 pendiente_49_5 pendiente_49_6 {
			replace denominator = denominator + 1 if !mi(`var')
			replace pendiente_49_bintot = pendiente_49_bintot + 1 if `var' > 0 & !mi(`var')
	}

	gen pendiente_49_perc = pendiente_49_bintot/denominator
	drop denominator
	* will deforest if not enrolled in program binary
	gen inscritopsa_471_bin = inscritopsa_471 > 0 & inscritopsa_471!=.
	gen inscritopsa_473_bin = inscritopsa_473 > 0 & inscritopsa_473!=.
	egen deforest_nopsa=rowmax(inscritopsa_471_bin inscritopsa_473_bin)
	

	* has deforested in last year binary
	gen desmonte_423_bin = 1 - desmonte_423_1
	foreach var of varlist desmonte_423_2 desmonte_423_3 desmonte_423_4 desmonte_423_5 desmonte_423_6 {
		replace desmonte_423_bin = 1 if `var' == 0 & !mi(`var') //NOTE: the plot by plot binaries are reported as 0 Yes 1 No  
	}

	* most recent deforest year 
	gen desmon_year_424_min = .
	foreach var of varlist desmon_year_424_1 desmon_year_424_2 desmon_year_424_3 desmon_year_424_4 desmon_year_424_5 desmon_year_424_6 {
		replace `var' = .5 if `var' == 2021
		replace desmon_year_424_min = `var' if mi(`desmon_year_424_min') & !mi(`var')
		replace desmon_year_424_min = `var' if `var' < desmon_year_424_min & !mi(`var')
	}	
		
		
	* Add previous lost forest
	preserve
	import delimited "${raw}\forest_cover_loss_18_19_20.csv", clear
	rename id code_id
	tostring code_id, replace
	tempfile tempforest
	save `tempforest', replace
	restore
	merge 1:1 code_id using `tempforest', gen(merge_forestloss)	

	* Area loss
	egen area_loss_avg = rowmean(area_loss_2018 area_loss_2019 area_loss_2020)	
	egen area_loss_perc_avg = rowmean(area_loss_perc_2018 area_loss_perc_2019 area_loss_perc_2020)
	sum area_loss_perc_avg, d
	gen area_loss_perc_avg_bin = area_loss_perc_avg > r(p50)
	
	* net monthly income, last month and on average
	gen ingre_gasto_diff_avg = ingre_mes_25 - gasto_mes_23
	gen ingre_gasto_diff_last = ingre_24 - gasto_22
	gen ingre_mes_25_ln = log(ingre_mes_25)

	egen forest_area_total = rowtotal(land_use_420a_*) 
	gen PSA_enrolled_forest_area = inscr_ha_315
	gen PSA_enrolledland_perc = inscr_ha_315/forest_area_total

	* % forest (of total land)
	gen forest_perc=forest_area_total/size_land_ha_48_tot
	
	* previous PSA experience
		
		*Enrolled
		gen previous_psa_enrolled=.
			replace previous_psa_enrolled=1 if insc_psa_313==0 |  insc_psa_313==1
			replace previous_psa_enrolled=0 if insc_psa_313==2
			
		*Received	
		gen previous_psa_received=.
			replace previous_psa_received=1 if insc_psa_313==0
			replace previous_psa_received=0 if insc_psa_313==1 | insc_psa_313==2
			
			
	* Risk aversion
		egen risk_temp=rowtotal(ra1 ra2), mis
		sum risk_temp if treatment!=.	
		gen risk_aversion=(risk_temp-`r(mean)')/`r(sd)'
		
	* Risk averdion dummy

	gen risk_aversion_d= . 
	summ risk_aversion if treatment!=., d	
	replace risk_aversion_d = 1 if risk_aversion>r(p50)
	replace risk_aversion_d = 0 if risk_aversion<=r(p50) 
	* Is or has been benneficiaty of PSA
		label def yesno 1 "yes" 0 "no"
		recode benfi_16_1 (1=0) (0=1), gen(benfi_psa_d)
		label values benfi_16_1 yesno 
		
		
	* Land for lifestock (Ha)
	egen land_livestock=rowtotal(land_use_420e_1 land_use_420e_2 land_use_420e_3 land_use_420e_4 land_use_420e_5 land_use_420e_6), mis
	label var land_livestock "Amount of land used for livestock"
	* Land for crops (Ha)
	egen land_crops=rowtotal(land_use_420d_1 land_use_420d_2 land_use_420d_3 land_use_420d_4 land_use_420d_5 land_use_420d_6), mis
	label var land_crops "Amount of land used for crops"	
	* Fences in land
	forvalues i=1/6 {
		tempvar fence`i'
		gen limit_`i'=cond(limites_418_`i'==0,1,cond(limites_418_`i'==1 |limites_418_`i'==2,0,. ))
		}
	egen fence_land=rowmax(limit_1-limit_6)
	label var fence_land "Land has a fence (=1)"
	
	* Feels that has less access to forest benefits 
	gen pooraccess_forestben=.
		replace pooraccess_forestben =1 if acceso_479==2
		replace pooraccess_forestben=0 if acceso_479==0 | acceso_479==1
	la var pooraccess_forestben "Feels that has lower access to forest benefits than others (=1)"
	
	
	* Ln income
	gen ln_income=ln(ingre_24)
	label var ln_income "Income in baseline (Ln)"
	
	* Ln household expenditure
	gen ln_gasto_22=ln(gasto_22)

	*-------------------  IV. Attitudes towards the environment --------------------*

/*Recoded as higher is better (pro-environment)*/

	* Damage environment if it improves life conditions
	gen att_envir_1= comu_siem_511
	label var att_envir_1 "In order to improve your lifestyle, sometimes you need to damage the environment."

	* Improve income if protecting the environment
	recode comu_siem_512 (4 = 0) (3 = 1) (2 = 2) (1 = 3) (0 = 4), gen(att_envir_2)  
	label var att_envir_2 "You can increase your income if you protect the environment."
	
	* If neighbours do not do anything to benefit the environment, you shoul not do anything either
	gen att_envir_3 = comu_confi_513
	label var att_envir_3 "If your neighbors do nothing to benefit the environment, no one should expect you to do anything to benefit the environment"

		/// Attitudes towards the environment
		egen attitudes_environment=rowtotal(att_envir_1  att_envir_2 att_envir_3), mis
		label var attitudes_environment "Pro-environment attitudes"		
			* Standardizing with the control group
			sum attitudes_environment if treatment !=.
			gen attitudes_environmentstd = (attitudes_environment-`r(mean)')/`r(sd)'	
		label var attitudes_environmentstd "Pro-environment attitudes (std)"		
	
*-------------------  IV. Attitudes towards the community --------------------*

/*Recoded as higher is better (pro-community)*/

	* Trust in members of the family
	recode comu_confi_57 (1=0)(0=1), gen(att_commu_1)
	
	* Trust in members of the community
	recode comu_confi_58 (1=0)(0=1), gen(att_commu_2)
	
	* Trust in NGOs actions
	recode comu_confi_59 (1=0)(0=1), gen(att_commu_3)
	
	* Trust in government
	recode comu_confi_510 (1=0)(0=1), gen(att_commu_4)
	
		/// Attitudes towards the community
		egen attitudes_community=rowtotal(att_commu_1 att_commu_2 att_commu_3 att_commu_4), mis
		label var attitudes_community "Pro-trust attitudes"		
			* Standardizing with the control group
			sum attitudes_community if treatment !=.
			gen attitudes_communitystd = (attitudes_community-`r(mean)')/`r(sd)'	
		label var attitudes_communitystd "Pro-trust attitudes (std)"		
	

	
	* Previous labels added
	* English labels for variables of interest
	la var deforest_nopsa "Will deforest if not enrolled in PSA program (=1)"
	la var ganado_455_bin "Some plots have no portion used for livestock"
	la var ganado_455_perc "Percent of plots with no portion used for livestock"
	la var desmonte_423_bin "Have you cut down (desmontar) any of the forest in any of your land in the last 3 years? (desmonte_423_* = 1)"
	la var inscritopsa_471_bin "Over the next year, if you are not in a PES program, how much do you think you will deforest? ($>$ 0)"
	la var pendiente_49_perc "Percent of plots on a slope"
	la var desmonte_423_1 "Have you cut down (desmontar) any of the forest in this land in the last 3 years?"
	la var inscritopsa_471_bin "Over the next year, if you are not in a PES program, will you deforest?"
	la var desmonte_423_bin "Have cut down (desmontar) any of the forest in this land in the last 3 years"
	la var genero_12_1 "Male"
	la var grado_15_1 "Years of school completed" 
	la var gasto_22 "Household expenditure in last month"
	la var ln_gasto_22 "Household expenditure in last month (Ln)"
	la var gasto_mes_23 "Average household expenditure (monthly)"
	la var ingre_24 "Household income in last month"
	la var ingre_mes_25 "Average household income (monthly)" 
	la var ingre_mes_25_ln "Average monthly income (log)"
	la var ingre_gasto_diff_avg "Average net household income (monthly)"
	la var ingre_gasto_diff_last "Net household income in last month"
	la var dist_carr_210 "From your household, using a vehicle, how long is it to the nearest main road"
	la var has_savings "Has monetary savings" 
	la var size_land_ha_48_tot "Land area across all plots (hectares)"
	la var area_loss_perc_avg "Percent forest loss (3 year average)"
	la var area_loss_perc_avg_bin "Percent forest loss $>$ median"
	la var presta_31 "Does your household currently have any loans outstanding that you have not yet repaid?"
	la var emerg_fuente_32 "If your household had an emergency and needed 5,000 MXN, how would you get most of this money?"
	la var desmon_year_424_min "How many years ago did you last deforest (tumbado)?"
	la var desmo_razon_429_1 "Main reason for deforesting (plot 1)"
	la var desmo_razon_429_2 "Main reason for deforesting (plot 2)"
	la var desmo_razon_429_3 "Main reason for deforesting (plot 3)"
	la var comu_siem_511 "In order to improve your lifestyle, sometimes you need to damage the environment."
	la var comu_siem_512 "You can increase your income if you protect the environment"
	la var comu_confi_513 "If your neighbors do nothing to benefit the environment, no one should expect you to do anything to benefit the environment"
	la var comu_prob_517 "Are the problems with the environment now more serious, equal, or less serious than the problems three years ago?"
	la var comu_recu_518 "What do you believe are the causes of environmental problems in your community?"
	la var comu_prob_521 "Should the government enforce laws that prevent people from doing things on their land that harm the environment or not?"
	la var comu_prob_522 "Is it a good idea for your local community to create rules or policies to protect the environment or not?"
	la var comu_prob_523 "What can the people in your community do in order to protect the environment? Can you give me 3 ideas?"
	la var forest_area "Primary forest area total across all plots (hectares)"
	la var PSA_enrolled_forest_area "PSA enrolled forest area across all plots"
	la var PSA_enrolledland_perc "Percentage forest area enrolled in PSA (if enrolled)"	
	label var benfi_psa_d "Has been or is enrolled in a PSA program"
	la var previous_psa_enrolled "Had enrolled previously in a PSA program (whether received it or not)"
	la var previous_psa_received "Had received previously a PSA payment"
	la var risk_aversion "Risk aversion score"
	la var risk_aversion_d "Risk aversion score above the median (=1)"
	la var forest_perc "Percent of forest"
	la var dist_carr_210 "Distance to road (minutes)"
	ds code_id treatment, not	
	foreach var in `r(varlist)'{
		rename `var' `var'_bl
	}
	save "${temp}\baseline.dta", replace

*===============================================================================*
*								4. Endline data 
*===============================================================================*
	
	use "${raw}\lacandona_endline_clean.dta", clear	// N = 60
	

	rename id_participant code_id
	* Add treatment status
	merge 1:1 code_id using "${temp}\treatment.dta", gen(tempmerge) keepusing(treatment)
	drop if tempmerge==2
	drop tempmerge
*======================*
*	I. Cleaning
*======================*

	* All variables with -98 or -99 are recoded as missing

	ds , has(type numeric)
	foreach var in 	`r(varlist)'{
		replace `var' = . if `var'==-97				
		replace `var' = . if `var'==-98
		replace `var' = . if `var'==-99
	    replace `var' = . if `var'==.d
		replace `var' = . if `var'==.r	
	}
	
	* recode "typo" in p11_55 and gen change of land use for livestock variable
	recode p11_55 (0 = .) (2=0), gen(change_land_livestock)
	label var change_land_livestock "Amount of land for livestock has changed during the last year (=1)"
	
	* Change of land use for crops
	gen change_land_crops= p11_50_1
	label var change_land_crops "Amount of land for crops has changed during the last year (=1)"

*-----------------------------  I. Demographics --------------------------------*

	* Female
	label var p3_2 "Female"
	label def sex 1 "Female" 0 "Male"
	recode p3_2 (2=0)
	label values p3_2 sex
	
	* Household income
	summ p4_4
	label var p4_4 "Household income in the last month  (MXN)"
		
	* Has monetary savings
	gen has_savings = p5_4 == 1 | p5_4 == 3	if p5_4!=.
	label var has_savings "Saved in the last month (=1)"
	
	* Household expenses 
	label var p4_2 "Household expenditure in last month (MXN)"
*-----------------------  II. Self reported deforestation -----------------------*
	
	*  Self reported : Removed trees in enrolled forest 	
	label var p7_4 "Removed trees in enrolled forest (=1)"
	label def yesno 1 "yes" 0 "no"
	label values p7_4 yesno 
	
	*  Self reported : Removed trees in any of the hh land in the last year
	label var p11_33 "Removed trees in HH land (=1)"	
	label values p11_33 yesno

	*  Self reported : Removed  big trees (>5m)
	label var p11_38 "Removed big trees in HH land (=1)"	
	label values p11_38 yesno

	*  Self reported : Removed  small trees 
	label var p11_42 "Removed small trees in HH land (=1)"	
	label values p11_42 yesno

		/// Self reported: Reported to have removed any tree 
		egen removed_tree=rowmax(p11_33 p11_38 p11_42)
		label var removed_tree "Removed any tree in HH land (=1)"	
	
	* Perceived monitoring (probability of the program noticing)
	
		* Other person removed all forest 
			fre p7_5
		* Other person removed part of forest 
		
	* Ln income
	gen ln_income=ln(p4_4)
	label var ln_income "Income in Follow-Up (Ln)"

*-------------------  IV. Attitudes towards the environment --------------------*
/*Recoded as higher is better (pro-environment)*/

	* Damage environment if it improves life conditions
	recode p12_5 (3 = 1) (1 = 3), gen(att_envir_1)  
	label var att_envir_1 "In order to improve your lifestyle, sometimes you need to damage the environment."

	* Improve income if protecting the environment
	gen att_envir_2 = p12_6
	label var att_envir_2 "You can increase your income if you protect the environment."
	
	* If neighbours do not do anything to benefit the environment, you shoul not do anything either
	recode p12_7 (3 = 1) (1 = 3), gen(att_envir_3)  
	tab p12_7 att_envir_3
	label var att_envir_3 "If your neighbors do nothing to benefit the environment, no one should expect you to do anything to benefit the environment"

		/// Attitudes towards the environment
		egen attitudes_environment=rowtotal(att_envir_1 att_envir_2 att_envir_3), mis
		label var attitudes_environment "Pro-environment attitudes"
			* Standardizing with the control group
			sum attitudes_environment if treatment!=.
			gen attitudes_environmentstd = (attitudes_environment-`r(mean)')/`r(sd)'
			label var attitudes_environmentstd "Pro-environmental attitudes (std)"

			
*-------------------  IV. Attitudes towards the community --------------------*

/*Recoded as higher is better (pro-community)*/


	
	* Trust in NGOs actions
	recode p12_3 (2=0), gen(att_commu_1)
	
	* Trust in government
	recode p12_4 (2=0), gen(att_commu_2)	
		/// Attitudes towards the community
		egen attitudes_community=rowtotal(att_commu_1 att_commu_2), mis
		label var attitudes_community "Pro-trust attitudes"		
			* Standardizing with the control group
			sum attitudes_community if treatment !=.
			gen attitudes_communitystd = (attitudes_community-`r(mean)')/`r(sd)'	
		label var attitudes_communitystd "Pro-trust attitudes (std)"		
				
*------------------------------  VI. Use of the money -------------------------*
	
	* Total money received from PSA 
	clonevar money_psa = p6_5
	la var money_psa "Money received from ALL PSA pilot"
	
	* Money spent
	egen spend_psa =  rowtotal(p6_7_1-p6_7_7), mis
	la var spend_psa "Money spend from ALL PSA pilot"

	* Share of money spent
	gen perc_spent_psa = spend_psa / money_psa
	la var perc_spent_psa "Share of PSA money spent"
		
		*Share spent in each category
		foreach var in p6_7_1 p6_7_2 p6_7_3 p6_7_4 p6_7_5 p6_7_6  p6_7_7 {
			gen perc_`var'=100*(`var'/spend_psa)
		}
		
	ds code_id, not	
	foreach var in `r(varlist)'{
		rename `var' `var'_fu
	}	
	save "${temp}\endline.dta", replace
	

*===============================================================================*
*								5. Treatment compliance
*===============================================================================*
	import delimited "${raw}\Lacandona_pilot_contracts.txt", clear
	rename participant_code code_id
	rename comply comply_str
	gen comply=1 if comply_str=="Yes"
	replace comply=0 if comply_str=="No"
	drop contract_type
	tostring code_id, replace
	tempfile compliance
	save "${temp}\compliance.dta", replace
	
	
*===============================================================================*
*								6. Payments
*===============================================================================*
	import delimited "${raw}\Lacandona_pilot_payments.txt", clear
	rename participant_code code_id
	rename comply comply_str
	gen comply=1 if comply_str=="Yes"
	replace comply=0 if comply_str=="No"
	tostring code_id, replace
	gen treat=.
		replace treat=1 if contract_type=="Full"
		replace treat=0 if contract_type=="Standard"	
	label values treat treatst	
		
	label def treatst 1 "Full enrollment" 0 "Standard contract"
	bys treat: sum enrolled_hectares
	save "${temp}\payments.dta", replace	
	
*===============================================================================*
*								7. Pixel data
*===============================================================================*	



*-------------------------------------------------------------------------------*	
*------------------------------- Cleaning pixel data ---------------------------*	
*-------------------------------------------------------------------------------*
{
	use "${raw}\pixel_data.dta", clear
* List of polygons with no contract area. We replace pixels with 0.
	
	replace polygon_id="5305_1" if polygon_id=="5303_1"
	unique polygon_id if manual_lab!=.
	split polygon_id, parse("_")
	rename polygon_id1 code_id
	
	* Keep Contract (continuous version) variable
	gen contract_cont=contract
	gen conafor_cont=conafor
	
	* Recode those polygons with no contract area
		replace contract=0 if inlist(polygon_id,"1102_136", "3502_2", "3504_133", "4101_102", "4506_9", "5202_1", "5305_113")
	

	
	* Gen forest loss in pixel
	
			/* Pixel is classified in that month as no forest if it
			had no forest in the previous month either (i.e. 
			two consecutive months of no forest)*/
		
	* May/ July 2022
	gen noforest_mayjuly2022=(may2022==0 & july2022==0) /*1 if no forest in the two months*/
		recode noforest_mayjuly2022 (1=0)(0=1), gen(forest_mayjuly2022)
	* April/May 2022	
	gen noforest_aprmay2022=(april2022==0 & may2022==0)
		recode noforest_aprmay2022 (1=0)(0=1), gen(forest_aprmay2022)
	* May/June 2022	
	gen noforest_mayjune2022=(may2022==0 & june2022==0)
	   	recode noforest_mayjune2022 (1=0)(0=1), gen(forest_mayjune2022)		
	* June/July 2022	
	gen noforest_junejuly2022=(june2022==0 & july2022==0)
		recode noforest_junejuly2022 (1=0)(0=1), gen(forest_junejuly2022)		
	* July/August 2022		
	gen noforest_julyaug2022=(july2022==0 & august2022==0)
		recode noforest_julyaug2022 (1=0)(0=1), gen(forest_julyaug2022)		
	* August/September 2022	
	gen noforest_augsept2022=(sept2022==0 & august2022==0)
		recode noforest_augsept2022 (1=0)(0=1), gen(forest_augsept2022)		
	

	* Baseline deforestation May 2021 or June 2021
	gen no_forest_mayjune2021=(may2021==0 & june2021==0)
		recode no_forest_mayjune2021(1=0)(0=1), gen(forest_mayjune2021)		


	
	
	* Gen forest loss var
	foreach d in aprmay mayjune mayjuly junejuly julyaug augsept   {
		* With May 2021
		gen forest_loss_`d'=forest_`d'2022-may2021
		replace forest_loss_`d'=0 if forest_loss_`d'==1
		replace forest_loss_`d'=1 if forest_loss_`d'==-1
		la var forest_loss_`d' "Forest loss in `d' 2022 (=1)"
		
		* With MANUAL May 2021
		gen f_lossmanual_`d'=forest_`d'2022-manual_lab
		replace f_lossmanual_`d'=0 if f_lossmanual_`d'==1
		replace f_lossmanual_`d'=1 if f_lossmanual_`d'==-1
		la var f_lossmanual_`d' "Forest loss in `d' 2022 (=1) (with Manual 2021)"
		
		
		* With May/June 2011
		gen forloss_`d'=forest_`d'2022-forest_mayjune2021
		replace forloss_`d'=0 if forloss_`d'==1
		replace forloss_`d'=1 if forloss_`d'==-1
		la var forloss_`d' "Forest loss in `d' 2022 (=1) (May/June 2021)"
		
	}

	* Gen change in forest cover variable  (Let's only keep tables where we define forest at baseline as May/June 2021.)
	foreach d in aprmay mayjune mayjuly junejuly julyaug augsept   {
		* With May/June 2011
		gen deltaforest_`d'=forest_`d'2022-forest_mayjune2021
		la var deltaforest_`d' "Change in forest cover in `d' 2022 (May/June 2021)"
		
		* With manual May 2021
		gen deltaforestmanual_`d'=forest_`d'2022-manual_lab
		la var deltaforestmanual_`d' "Change in forest cover in `d' 2022 (May 2021) (Manual)"
	}
		*On the change in forest cover, it should be using all of the pixels for the landowner, not just the ones that were forest at bl,
	
	* Gen previous deforestation
	gen noforest_mayjuly2021=(may2021==0 & july2021==0) 
		recode noforest_mayjuly2021 (1=0)(0=1), gen(forest_mayjuly_2021)
	
	gen previous_defo_2021=forest_mayjuly_2021-june2020
		replace previous_defo_2021=0 if previous_defo_2021==1
		replace previous_defo_2021=1 if previous_defo_2021==-1
		
	sum forest_loss_augsept if code_id=="1505" | code_id=="5306"

	***** Exclude observations that were deforested at baseline

		foreach d in aprmay mayjune mayjuly junejuly julyaug augsept {
		replace forest_loss_`d'=. if may2021==0
		replace forloss_`d'=. if may2021==0 & june2021==0
		replace f_lossmanual_`d'=. if manual_lab == 0
		
		}
	sum forest_loss_augsept if code_id=="1505" | code_id=="5306"
	
	* Replace polygons with sample mean deforestation among those who complied and we didn't impute

	* Identified polygons that are imputed	
	gen flag_impute=inlist(polygon_id, "2202_107", "3201_103", "5202_24", "6306_3", "3201_103")
	* Merge compliance
	merge m:1 code_id using "${temp}\compliance.dta"
	
		* For post-revision ad-hoc analysis (6/13). Keeping individuals 1505 and 5306
		preserve
			keep if code_id=="1505" | code_id=="5306"
			tempfile temporaryaddition
			save `temporaryaddition'
		restore
	
	
	
		keep if status_contract=="Accepted"
		foreach var in forloss_mayjuly forloss_aprmay forloss_mayjune forloss_junejuly forloss_julyaug forloss_augsept {
		* Complied and not impute
		sum `var' if flag_impute==0 & comply ==1
		local mean_`var'=r(mean)
		dis `mean_`var''
		replace `var'=`mean_`var'' if inlist(polygon_id, "2202_107", "3201_103", "5202_24", "6306_3", "3201_103")	& `var'!=.
		}		

	

	
	
	* Add temporarily IDs 1505 and 5306
	append using `temporaryaddition'
	clonevar d_conafor=conafor
	
	*Continuous for Non-Conafor
	gen nonconafor=1-d_conafor
	clonevar d_nonconafor=nonconafor

	
	
	* Save dataset for pixel-level analysis 
		* Making the variables dichotomous 
		replace property=0 if property>0 & property<1
		replace conafor=0 if conafor>0 & conafor<1
		replace nonconafor=0 if conafor==1 
		replace nonconafor=1 if conafor==0
		
		
		* Drop if pixel outside of land boundary mapping
		drop if property==0
		
	save "${temp}\pixel_data_final.dta", replace
	

	
}


*-------------------------------------------------------------------------------*	
*-------------------------- Collapsing at the polygon level --------------------*	
*-------------------------------------------------------------------------------*	
{
	use "${temp}\pixel_data_final.dta", clear	

*	Note: We create the variables "tmp_" interacting the variable of interest with
*		  the variables of contract, property, conafor or not conafor (continuous)
*		  in order to account for the partial areas cover in the pixels.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
		

	* Predicted pixels with forest in 2020
		* Contract
		gen tmp_1=june2020*contract		
		bys polygon_id: egen predforest_june2020_contract_t=total(tmp_1)
		bys polygon_id: egen predforest_june2020_contract=max(predforest_june2020_contract_t)
		* Property
		gen tmp_2=june2020*property				
		bys polygon_id: egen predforest_june2020_property_t=total(tmp_2)
		bys polygon_id: egen predforest_june2020_property=max(predforest_june2020_property_t)
		* Conafor contract
		gen tmp_3=june2020*property*d_conafor			
		bys polygon_id: egen predforest_june2020_conafor_t=total(tmp_3)
		bys polygon_id: egen predforest_june2020_conafor=max(predforest_june2020_conafor_t) 
		* Non-Conafor contract
		gen tmp_4=june2020*property*nonconafor					
		bys polygon_id: egen predforest_june2020_nocona_t=total(tmp_4)
		bys polygon_id: egen predforest_june2020_nocona=max(predforest_june2020_nocona_t) 
		
   drop tmp_*
   
   
	* Predicted pixels with forest in 2021
	
		* Contract
		gen tmp_1=may2021*contract
		bys polygon_id: egen predforest_may2021_contract_t=total(tmp_1)
		bys polygon_id: egen predforest_may2021_contract=max(predforest_may2021_contract_t)
		* Property
		gen tmp_2=may2021*property				
		bys polygon_id: egen predforest_may2021_property_t=total(tmp_2)
		bys polygon_id: egen predforest_may2021_property=max(predforest_may2021_property_t)
		* Conafor contract
		gen tmp_3=may2021*property*d_conafor			
		bys polygon_id: egen predforest_may2021_conafor_t=total(tmp_3)
		bys polygon_id: egen predforest_may2021_conafor=max(predforest_may2021_conafor_t) 
		* Non-Conafor contract
		gen tmp_4=may2021*property*nonconafor					
		bys polygon_id: egen predforest_may2021_nocona_t=total(tmp_4)
		bys polygon_id: egen predforest_may2021_nocona=max(predforest_may2021_nocona_t) 
		
   drop tmp_*		
		
		* Contract
		gen tmp_1=forest_mayjune2021*contract		
		bys polygon_id: egen predfor_may2021_contract_t=total(tmp_1)
		bys polygon_id: egen predfor_may2021_contract=max(predfor_may2021_contract_t)
		* Property
		gen tmp_2=forest_mayjune2021*property				
		bys polygon_id: egen predfor_may2021_property_t=total(tmp_2)
		bys polygon_id: egen predfor_may2021_property=max(predfor_may2021_property_t)
		* Conafor contract
		gen tmp_3=forest_mayjune2021*property*d_conafor			
		bys polygon_id: egen predfor_may2021_conafor_t=total(tmp_3) 
		bys polygon_id: egen predfor_may2021_conafor=max(predfor_may2021_conafor_t) 
		* Non-Conafor contract
		gen tmp_4=forest_mayjune2021*property*nonconafor			
		bys polygon_id: egen predfor_may2021_nocona_t=total(tmp_4)
		bys polygon_id: egen predfor_may2021_nocona=max(predfor_may2021_nocona_t) 
		
		
		
		
		drop *_t tmp_*	 
	* Deforested areas in 2022
	
	local foresloss ""
	foreach d in aprmay mayjune mayjuly junejuly julyaug augsept {
		
		* Contract
		gen tmp_1=forest_loss_`d'*contract					
		bys polygon_id: egen forest_loss_`d'_contract_t=total(tmp_1)
		bys polygon_id: egen forest_loss_`d'_contract=max(forest_loss_`d'_contract_t)
		
		gen tmp_2=forloss_`d'*contract					
		bys polygon_id: egen forloss_`d'_contract_t=total(tmp_2)
		bys polygon_id: egen forloss_`d'_contract=max(forloss_`d'_contract_t)
		
		* Property
		gen tmp_3=forest_loss_`d'*property				
		bys polygon_id: egen forest_loss_`d'_property_t=total(tmp_3)
		bys polygon_id: egen forest_loss_`d'_property=max(forest_loss_`d'_property_t)

		gen tmp_4=forloss_`d'*property				
		bys polygon_id: egen forloss_`d'_property_t=total(tmp_4)	
		bys polygon_id: egen forloss_`d'_property=max(forloss_`d'_property_t)
		
		* Conafor contract
		gen tmp_5=forest_loss_`d'*property*d_conafor					
		bys polygon_id: egen forest_loss_`d'_conafor_t=total(tmp_5) 
		bys polygon_id: egen forest_loss_`d'_conafor=max(forest_loss_`d'_conafor_t)
	
		gen tmp_6=forloss_`d'*property*d_conafor						
		bys polygon_id: egen forloss_`d'_conafor_t=total(tmp_6) 
		bys polygon_id: egen forloss_`d'_conafor=max(forloss_`d'_conafor_t)
		
		* Non-Conafor contract
		gen tmp_7=forest_loss_`d'*property*nonconafor					
		bys polygon_id: egen forest_loss_`d'_nocona_t=total(tmp_7) 
		bys polygon_id: egen forest_loss_`d'_nocona=max(forest_loss_`d'_nocona_t)

		gen tmp_8=forloss_`d'*property*nonconafor					
		bys polygon_id: egen forloss_`d'_nocona_t=total(tmp_8)
		bys polygon_id: egen forloss_`d'_nocona=max(forloss_`d'_nocona_t)
		
		drop *_t tmp_*
		local forestloss `forestloss' forest_loss_`d'_contract forest_loss_`d'_property forest_loss_`d'_conafor forest_loss_`d'_nocona forloss_`d'_contract forloss_`d'_property forloss_`d'_conafor forloss_`d'_nocona
	}
	
	
	
		* Previous deforestation
		
		* Contract
		gen tmp_1=previous_defo_2021*contract			
		bys polygon_id: egen previous_defo_contract_t=total(tmp_1)
		bys polygon_id: egen previous_defo_contract=max(previous_defo_contract_t)
		* Property
		gen tmp_2=previous_defo_2021*property			
		bys polygon_id: egen previous_defo_property_t=total(tmp_2)
		bys polygon_id: egen previous_defo_property=max(previous_defo_property_t)
		* Conafor contract	
		gen tmp_3=previous_defo_2021*property*d_conafor					
		bys polygon_id: egen previous_defo_conafor_t=total(tmp_3)
		bys polygon_id: egen previous_defo_conafor=max(previous_defo_conafor_t)
		* Non-Conafor contract
		gen tmp_4=previous_defo_2021*property*nonconafor				
		bys polygon_id: egen previous_defo_nocona_t=total(tmp_4)	
		bys polygon_id: egen previous_defo_nocona=max(previous_defo_nocona_t)
		drop *_t tmp_*
		
		global forestloss `forestloss'  previous_defo_contract previous_defo_property previous_defo_conafor previous_defo_nocona predfor*
		
	* Collapse by polygon id	
	collapse (mean) $forestloss, by(polygon_id)
* Deforested areas as a share of contracted/property areas

* Previous 
		gen  defp_previous_contract=previous_defo_contract/predforest_june2020_contract
			la var  defp_previous_contract "(Previous) Deforestation as % of area contracted to may 2021 (base pixel)"
		gen  defp_previous_property=previous_defo_property/predforest_june2020_property
			la var defp_previous_property "(Previous) Deforestation as % of property area to may 2021 (base pixel)"
		gen  defp_previous_conafor=previous_defo_conafor/predforest_june2020_conafor
			la var defp_previous_conafor "(Previous) Deforestation as % of conafor area to may 2021 (base pixel)"
		gen  defp_previous_nocona=previous_defo_nocona/predforest_june2020_nocona
			la var  defp_previous_nocona "(Previous) Deforestation as % of non-conafor area to may 2021 (base pixel)"			
		
	foreach var in aprmay mayjune mayjuly junejuly julyaug augsept {
	
		* Deforestation as % of the area contracted	
		gen defp_`var'_contract=forest_loss_`var'_contract/predforest_may2021_contract
			la var defp_`var'_contract "Defo as % of area contracted to `var' 2022 (May 2021)"
		gen defp2_`var'_contract=forloss_`var'_contract/predfor_may2021_contract
			la var defp2_`var'_contract "Defo as % of area contracted to `var' 2022 (May/June 2021)"	
	
	  * Deforestation as % of the property area
		gen defp_`var'_property=forest_loss_`var'_property/predforest_may2021_property
			la var defp_`var'_property "Defo as % of property area to `var' 2022 (May 2021)"
		gen defp2_`var'_property=forloss_`var'_property/predfor_may2021_property
			la var defp2_`var'_property "Defo as % of property area to `var' 2022 (May/June 2021)"			
	  
	  * Deforestation as % of the conafor area
		gen defp_`var'_conafor=forest_loss_`var'_conafor/predforest_may2021_conafor
			la var defp_`var'_conafor "Defo as % of conafor area to `var' 2022 (May 2021)"
		gen defp2_`var'_conafor=forloss_`var'_conafor/predfor_may2021_conafor
			la var defp2_`var'_conafor "Defo as % of conafor area to `var' 2022 (May/June 2021)"	
	   * Deforestation as % of the noconafor area
		gen defp_`var'_nocona=forest_loss_`var'_nocona/predforest_may2021_nocona
			la var defp_`var'_nocona "Defo as % of noconafor area to `var' 2022 (May 2021)"
		gen defp2_`var'_nocona=forloss_`var'_nocona/predfor_may2021_nocona
			la var defp2_`var'_nocona "Defo as % of noconafor area to `var' 2022 (May/June 2021)"		
	
	}
	
	
		* Rename variables with px prefix
		ds polygon_id, not
		foreach var in `r(varlist)' {
			rename `var' px_`var'
		}
	

	
		*Polygons with no contracted area:
		ds *conafor* *contract*
	foreach var in  `r(varlist)'  {
			replace `var'=0 if inlist(polygon_id,"1102_136", "3502_2", "3504_133", "4101_102", "4506_9", "5202_1", "5305_113")
	}
	

		* Saving temp
		save "${temp}\defo_pixel_polygonlevel_v4.dta", replace		
}	


*-------------------------------------------------------------------------------*	
*----------------------- Collapsing at the individual level --------------------*	
*-------------------------------------------------------------------------------*	
{
	use "${temp}\pixel_data_final.dta", clear	
	
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*	Note: We create the variables "tmp_" interacting the variable of interest with
*		  the variables of contract, property, conafor or not conafor (continuous)
*		  in order to account for the partial areas cover in the pixels.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
	
 *Not deforested in baseline (area with forest in baseline)
	gen not_defo_bl=(may2021==1)
		* By polygon
		bys code_id: egen not_deforested_area=total(not_defo_bl)	
		* In property area
		gen tmp_1=not_defo_bl*property 
		bys code_id: egen not_deforested_property=total(tmp_1)
		* In Conafor area 
		gen tmp_2=not_defo_bl*property*d_conafor
		bys code_id: egen not_deforested_conafor=total(tmp_2)
	
	drop tmp_*

	* Predicted pixels with forest in 2020
		* Contract
		gen tmp_1=june2020*contract
		bys code_id: egen predforest_june2020_contract_t=total(tmp_1) 
		bys code_id: egen predforest_june2020_contract=max(predforest_june2020_contract_t)
		* Property
		gen tmp_2=june2020*property		
		bys code_id: egen predforest_june2020_property_t=total(tmp_2)
		bys code_id: egen predforest_june2020_property=max(predforest_june2020_property_t)
		* Conafor contract
		gen tmp_3=june2020*property*d_conafor	
		bys code_id: egen predforest_june2020_conafor_t=total(tmp_3)
		bys code_id: egen predforest_june2020_conafor=max(predforest_june2020_conafor_t) 
		* Non-Conafor contract
		gen tmp_4=june2020*property*nonconafor			
		bys code_id: egen predforest_june2020_nocona_t=total(tmp_4) 
		bys code_id: egen predforest_june2020_nocona=max(predforest_june2020_nocona_t) 
	
   drop tmp_*
	
	* Predicted pixels with forest in 2021
	
		* Contract
		gen tmp_1=may2021*contract	
		bys code_id: egen predforest_may2021_contract_t=total(tmp_1)
		bys code_id: egen predforest_may2021_contract=max(predforest_may2021_contract_t)
		* Property
		gen tmp_2=may2021*property			
		bys code_id: egen predforest_may2021_property_t=total(tmp_2)
		bys code_id: egen predforest_may2021_property=max(predforest_may2021_property_t)
		* Conafor contract
		gen tmp_3=may2021*property*d_conafor			
		bys code_id: egen predforest_may2021_conafor_t=total(tmp_3)
		bys code_id: egen predforest_may2021_conafor=max(predforest_may2021_conafor_t) 
		* Non-Conafor contract
		gen tmp_4=may2021*property*nonconafor		
		bys code_id: egen predforest_may2021_nocona_t=total(tmp_4)
		bys code_id: egen predforest_may2021_nocona=max(predforest_may2021_nocona_t) 
 
 drop tmp_*		

		* Contract
		gen tmp_1=forest_mayjune2021*contract		
		bys code_id: egen predfor_may2021_contract_t=total(tmp_1)
		bys code_id: egen predfor_may2021_contract=max(predfor_may2021_contract_t)
		* Property
		gen tmp_2=forest_mayjune2021*property			
		bys code_id: egen predfor_may2021_property_t=total(tmp_2)
		bys code_id: egen predfor_may2021_property=max(predfor_may2021_property_t)
		* Conafor contract
		gen tmp_3=forest_mayjune2021*property*d_conafor			
		bys code_id: egen predfor_may2021_conafor_t=total(tmp_3)
		bys code_id: egen predfor_may2021_conafor=max(predfor_may2021_conafor_t) 
		* Non-Conafor contract
		gen tmp_4=forest_mayjune2021*property*nonconafor			
		bys code_id: egen predfor_may2021_nocona_t=total(tmp_4)
		bys code_id: egen predfor_may2021_nocona=max(predfor_may2021_nocona_t) 	
		drop *_t	
		drop tmp_*	
		
	* Deforested areas in 2022
	global forestloss ""
	local forestloss ""
	foreach d in aprmay mayjune mayjuly junejuly julyaug augsept {
		
		* Contract
		gen tmp_1=forest_loss_`d'*contract			
		bys code_id: egen forest_loss_`d'_contract_t=total(tmp_1)
		bys code_id: egen forest_loss_`d'_contract=max(forest_loss_`d'_contract_t)

		gen tmp_2=forloss_`d'*contract			
		bys code_id: egen forloss_`d'_contract_t=total(tmp_2)	
		bys code_id: egen forloss_`d'_contract=max(forloss_`d'_contract_t)
		
		* Property
		gen tmp_3=forest_loss_`d'*property		
		bys code_id: egen forest_loss_`d'_property_t=total(tmp_3)
		bys code_id: egen forest_loss_`d'_property=max(forest_loss_`d'_property_t)
		
		gen tmp_4=forloss_`d'*property			
		bys code_id: egen forloss_`d'_property_t=total(tmp_4)	
		bys code_id: egen forloss_`d'_property=max(forloss_`d'_property_t)
		
		* Conafor contract
		gen tmp_5=forest_loss_`d'*property*d_conafor			
		bys code_id: egen forest_loss_`d'_conafor_t=total(tmp_5)
		bys code_id: egen forest_loss_`d'_conafor=max(forest_loss_`d'_conafor_t)
		
		gen tmp_6=forloss_`d'*property*d_conafor					
		bys code_id: egen forloss_`d'_conafor_t=total(tmp_6)
		bys code_id: egen forloss_`d'_conafor=max(forloss_`d'_conafor_t)
		
		* Non-Conafor contract
		gen tmp_7=forest_loss_`d'*property*nonconafor			
		bys code_id: egen forest_loss_`d'_nocona_t=total(tmp_7)
		bys code_id: egen forest_loss_`d'_nocona=max(forest_loss_`d'_nocona_t)

		gen tmp_8=forloss_`d'*property*nonconafor			
		bys code_id: egen forloss_`d'_nocona_t=total(tmp_8)
		bys code_id: egen forloss_`d'_nocona=max(forloss_`d'_nocona_t)
		
		drop *_t tmp_*
		local forestloss `forestloss' forest_loss_`d'_contract forest_loss_`d'_property forest_loss_`d'_conafor forest_loss_`d'_nocona forloss_`d'_contract forloss_`d'_property forloss_`d'_conafor forloss_`d'_nocona
	}
	
	
	
		* Previous deforestation
		gen tmp_1=previous_defo_2021*contract			
		bys code_id: egen previous_defo_contract_t=total(tmp_1)
		bys code_id: egen previous_defo_contract=max(previous_defo_contract_t)

		gen tmp_2=previous_defo_2021*property		
		bys code_id: egen previous_defo_property_t=total(tmp_2)
		bys code_id: egen previous_defo_property=max(previous_defo_property_t)
		
		gen tmp_3=previous_defo_2021*property*d_conafor		
		bys code_id: egen previous_defo_conafor_t=total(tmp_3)	
		bys code_id: egen previous_defo_conafor=max(previous_defo_conafor_t)
	
		gen tmp_4=previous_defo_2021*property*nonconafor				
		bys code_id: egen previous_defo_nocona_t=total(tmp_4)	
		bys code_id: egen previous_defo_nocona=max(previous_defo_nocona_t)
		drop *_t tmp_*
		
		global forestloss `forestloss'  previous_defo_contract previous_defo_property previous_defo_conafor previous_defo_nocona predfor*
		
	* CHekcing
	br code_id property conafor nonconafor  if inlist(code_id,"1506","2101","3201","5306","6305" )
	sum property d_conafor conafor nonconafor if code_id == "1506"
	sum property d_conafor conafor nonconafor if code_id == "2101"
	sum property d_conafor conafor nonconafor if code_id == "3201"
	sum property d_conafor conafor nonconafor if code_id == "5306"
	sum property d_conafor conafor nonconafor if code_id == "6305"
	
		
	
	
	* Collapse by polygon id	
	collapse (mean) $forestloss, by(code_id)
* Deforested areas as a share of contracted/property areas

* Previous 
		gen  defp_previous_contract=previous_defo_contract/predforest_june2020_contract
			la var  defp_previous_contract "(Previous) Deforestation as % of area contracted to may 2021 (base pixel)"
		gen  defp_previous_property=previous_defo_property/predforest_june2020_property
			la var defp_previous_property "(Previous) Deforestation as % of property area to may 2021 (base pixel)"
		gen  defp_previous_conafor=previous_defo_conafor/predforest_june2020_conafor
			la var defp_previous_conafor "(Previous) Deforestation as % of conafor area to may 2021 (base pixel)"
		gen  defp_previous_nocona=previous_defo_nocona/predforest_june2020_nocona
			la var  defp_previous_nocona "(Previous) Deforestation as % of non-conafor area to may 2021 (base pixel)"			
		
	foreach var in aprmay mayjune mayjuly junejuly julyaug augsept {
	
		* Deforestation as % of the area contracted	
		gen defp_`var'_contract=forest_loss_`var'_contract/predforest_may2021_contract
			la var defp_`var'_contract "Defo as % of area contracted to `var' 2022 (May 2021)"
		gen defp2_`var'_contract=forloss_`var'_contract/predfor_may2021_contract
			la var defp2_`var'_contract "Defo as % of area contracted to `var' 2022 (May/June 2021)"	
	
	  * Deforestation as % of the property area
		gen defp_`var'_property=forest_loss_`var'_property/predforest_may2021_property
			la var defp_`var'_property "Defo as % of property area to `var' 2022 (May 2021)"
		gen defp2_`var'_property=forloss_`var'_property/predfor_may2021_property
			la var defp2_`var'_property "Defo as % of property area to `var' 2022 (May/June 2021)"			
	  
	  * Deforestation as % of the conafor area
		gen defp_`var'_conafor=forest_loss_`var'_conafor/predforest_may2021_conafor
			la var defp_`var'_conafor "Defo as % of conafor area to `var' 2022 (May 2021)"
		gen defp2_`var'_conafor=forloss_`var'_conafor/predfor_may2021_conafor
			la var defp2_`var'_conafor "Defo as % of conafor area to `var' 2022 (May/June 2021)"	
	   * Deforestation as % of the noconafor area
		gen defp_`var'_nocona=forest_loss_`var'_nocona/predforest_may2021_nocona
			la var defp_`var'_nocona "Defo as % of noconafor area to `var' 2022 (May 2021)"
		gen defp2_`var'_nocona=forloss_`var'_nocona/predfor_may2021_nocona
			la var defp2_`var'_nocona "Defo as % of noconafor area to `var' 2022 (May/June 2021)"		
	
	}
	
	
		* Rename variables with px prefix
		ds code_id, not
		foreach var in `r(varlist)' {
			rename `var' px_`var'
		}
	


		* Saving temp
		save "${temp}\defo_pixel_indlevel_v4.dta", replace		
}	

	
	
*===============================================================================*
*								8. Merging data 
*===============================================================================*
	

	use "${temp}\baseline.dta", clear
	*keep if _merge_bl==3
	merge 1:1 code_id using "${temp}\compliance.dta"

	* Keep participants with "Accepted" status
	keep if status_contract=="Accepted" | status_contract=="Rejected" // N=65	
	
		* There is no polygon data for polygon 1505
		drop if code_id=="1505"
	merge 1:1 code_id using "${temp}\endline.dta", gen(merge_fu)	

	gen analysis_fu=status_contract=="Accepted"| status_contract=="Rejected"
	
		* Attritor if BL but not in FU
		gen attritor=.
			replace attritor=1 if merge_fu==1
			replace attritor=0 if merge_fu==3
			label var attritor "Attrition (=1)"
			
	merge 1:1 code_id using "${temp}\plotlevel.dta", gen(merge_plot)
	merge 1:1 code_id using "${temp}\deforestation.dta", gen(merge_defor)
	* Payments
	merge 1:1 code_id using  "${temp}\payments.dta", gen(merge_payment)	

	
	* Merge pixel data (at the individual level)
	
	merge 1:1 code_id using "${temp}\defo_pixel_indlevel_v4.dta", gen(merge_defor_pixel)	
	
	global hetvars ingre_24_bl dist_carr_210_bl pendiente_49_perc_bl attitudes_environmentstd_bl attitudes_communitystd_bl  risk_aversion_bl 

	* Imputation		
	foreach var in dist_carr_210_bl risk_aversion_bl pendiente_49_perc_bl attitudes_communitystd_bl attitudes_environmentstd_bl ingre_24_bl land_livestock_bl land_crops_bl {
	summ `var' if analysis_fu==1
	gen mis_`var'=(`var'==.)
	replace `var'= `r(mean)' if `var'==. & analysis_fu ==1
	}
	
	compress
	save "${analysis}\analysis_v4.dta", replace
	
	
	
	exit
	